package wx.xml.wutka.dtd;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.Enumeration;
import java.util.Hashtable;

/**
 * Parses a DTD file and returns a DTD object
 *
 * @author Mark Wutka
 * @version $Revision: 1.1 $ $Date: 2004/08/19 05:30:23 $ by $Author: aslom $
 */
public class DTDParser implements EntityExpansion {
    protected Scanner scanner;
    protected DTD     dtd;
    protected Object  defaultLocation;

    /**
     * Creates a parser that will read from the specified Reader object
     */
    public DTDParser(Reader in) {
        scanner = new Scanner(in, false, this);
        dtd = new DTD();
    }

    /**
     * Creates a parser that will read from the specified Reader object
     *
     * @param in    The input stream to read
     * @param trace True if the parser should print out tokens as it reads them
     *              (used for debugging the parser)
     */
    public DTDParser(Reader in, boolean trace) {
        scanner = new Scanner(in, trace, this);
        dtd = new DTD();
    }

    /**
     * Creates a parser that will read from the specified File object
     */
    public DTDParser(File in)
        throws IOException {
        defaultLocation = in.getParentFile();

        scanner = new Scanner(new BufferedReader(new FileReader(in)),
            false, this);
        dtd = new DTD();
    }

    /**
     * Creates a parser that will read from the specified File object
     *
     * @param in    The file to read
     * @param trace True if the parser should print out tokens as it reads them
     *              (used for debugging the parser)
     */
    public DTDParser(File in, boolean trace)
        throws IOException {
        defaultLocation = in.getParentFile();

        scanner = new Scanner(new BufferedReader(new FileReader(in)),
            trace, this);
        dtd = new DTD();
    }

    /**
     * Creates a parser that will read from the specified URL object
     */
    public DTDParser(URL in)
        throws IOException {
        //LAM: we need to set the defaultLocation to the directory where
        //the dtd is found so that we don't run into problems parsing any
        //relative external files referenced by the dtd.
        String file = in.getFile();
        defaultLocation = new URL(in.getProtocol(), in.getHost(), in.getPort(), file.substring(0, file.lastIndexOf('/') + 1));

        scanner = new Scanner(new BufferedReader(
            new InputStreamReader(in.openStream())), false, this);
        dtd = new DTD();
    }

    /**
     * Creates a parser that will read from the specified URL object
     *
     * @param in    The URL to read
     * @param trace True if the parser should print out tokens as it reads them
     *              (used for debugging the parser)
     */
    public DTDParser(URL in, boolean trace)
        throws IOException {
        //LAM: we need to set the defaultLocation to the directory where
        //the dtd is found so that we don't run into problems parsing any
        //relative external files referenced by the dtd.
        String file = in.getFile();
        defaultLocation = new URL(in.getProtocol(), in.getHost(), in.getPort(), file.substring(0, file.lastIndexOf('/') + 1));


        scanner = new Scanner(new BufferedReader(
            new InputStreamReader(in.openStream())), trace, this);
        dtd = new DTD();
    }

    /**
     * Parses the DTD file and returns a DTD object describing the DTD.
     * This invocation of parse does not try to guess the root element
     * (for efficiency reasons)
     */
    public DTD parse()
        throws IOException {
        return parse(false);
    }

    /**
     * Parses the DTD file and returns a DTD object describing the DTD.
     *
     * @param guessRootElement If true, tells the parser to try to guess the
     *                         root element of the document by process of elimination
     */
    public DTD parse(boolean guessRootElement)
        throws IOException {
        Token token;

        for (; ; ) {
            token = scanner.peek();

            if (token.type == Scanner.EOF)
                break;

            parseTopLevelElement();
        }

        if (guessRootElement) {
            Hashtable roots = new Hashtable();

            Enumeration e = dtd.elements.elements();

            while (e.hasMoreElements()) {
                DTDElement element = (DTDElement) e.nextElement();
                roots.put(element.name, element);
            }

            e = dtd.elements.elements();
            while (e.hasMoreElements()) {
                DTDElement element = (DTDElement) e.nextElement();
                if (!(element.content instanceof DTDContainer))
                    continue;

                Enumeration items = ((DTDContainer) element.content).
                    getItemsVec().elements();

                while (items.hasMoreElements()) {
                    removeElements(roots, dtd, (DTDItem) items.nextElement());
                }
            }

            if (roots.size() == 1) {
                e = roots.elements();
                dtd.rootElement = (DTDElement) e.nextElement();
            } else {
                dtd.rootElement = null;
            }
        } else {
            dtd.rootElement = null;
        }

        return dtd;
    }

    protected void removeElements(Hashtable h, DTD dtd, DTDItem item) {
        if (item instanceof DTDName) {
            h.remove(((DTDName) item).value);
        } else if (item instanceof DTDContainer) {
            Enumeration e = ((DTDContainer) item).getItemsVec().elements();

            while (e.hasMoreElements()) {
                removeElements(h, dtd, (DTDItem) e.nextElement());
            }
        }
    }

    protected void parseTopLevelElement()
        throws IOException {
        Token token = scanner.get();

// Is <? xxx ?> even valid in a DTD?  I'll ignore it just in case it's there
        if (token.type == Scanner.LTQUES) {
            StringBuffer textBuffer = new StringBuffer();

            for (; ; ) {
                String text = scanner.getUntil('?');
                textBuffer.append(text);

                token = scanner.peek();
                if (token.type == Scanner.GT) {
                    scanner.get();
                    break;
                }
                textBuffer.append('?');
            }
            DTDProcessingInstruction instruct =
                new DTDProcessingInstruction(textBuffer.toString());

            dtd.items.addElement(instruct);

            return;
        } else if (token.type == Scanner.CONDITIONAL) {
            token = expect(Scanner.IDENTIFIER);

            if (token.value.equals("IGNORE")) {
                scanner.skipConditional();
            } else {
                if (token.value.equals("INCLUDE")) {
                    scanner.skipUntil('[');
                } else {
                    throw new DTDParseException(scanner.getUriId(),
                        "Invalid token in conditional: " + token.value,
                        scanner.getLineNumber(), scanner.getColumn());
                }
            }
        } else if (token.type == Scanner.ENDCONDITIONAL) {
            // Don't need to do anything for this token
        } else if (token.type == Scanner.COMMENT) {
            dtd.items.addElement(
                new DTDComment(token.value));
        } else if (token.type == Scanner.LTBANG) {

            token = expect(Scanner.IDENTIFIER);

            if (token.value.equals("ELEMENT")) {
                parseElement();
            } else if (token.value.equals("ATTLIST")) {
                parseAttlist();
            } else if (token.value.equals("ENTITY")) {
                parseEntity();
            } else if (token.value.equals("NOTATION")) {
                parseNotation();
            } else {
                skipUntil(Scanner.GT);
            }
        } else {
// MAW Version 1.17
// Previously, the parser would skip over unexpected tokens at the
// upper level. Some invalid DTDs would still show up as valid.
            throw new DTDParseException(scanner.getUriId(),
                "Unexpected token: " + token.type.name + "(" + token.value + ")",
                scanner.getLineNumber(), scanner.getColumn());
        }

    }

    protected void skipUntil(TokenType stopToken)
        throws IOException {
        Token token = scanner.get();

        while (token.type != stopToken) {
            token = scanner.get();
        }
    }

    protected Token expect(TokenType expected)
        throws IOException {
        Token token = scanner.get();

        if (token.type != expected) {
            if (token.value == null) {
                throw new DTDParseException(scanner.getUriId(),
                    "Expected " + expected.name + " instead of " + token.type.name,
                    scanner.getLineNumber(), scanner.getColumn());
            } else {
                throw new DTDParseException(scanner.getUriId(),
                    "Expected " + expected.name +
                        " instead of " + token.type.name + "(" + token.value + ")",
                    scanner.getLineNumber(), scanner.getColumn());
            }
        }

        return token;
    }

    protected void parseElement()
        throws IOException {
        Token name = expect(Scanner.IDENTIFIER);

        DTDElement element = (DTDElement) dtd.elements.get(name.value);

        if (element == null) {
            element = new DTDElement(name.value);
            dtd.elements.put(element.name, element);
        } else if (element.content != null) {
// 070501 MAW: Since the ATTLIST tag can also cause an element to be created,
// only throw this exception if the element has content defined, which
// won't happen when you just create an ATTLIST. Thanks to
// Jags Krishnamurthy of Object Edge for pointing out this problem - 
// originally the parser would let you define an element more than once.
            throw new DTDParseException(scanner.getUriId(),
                "Found second definition of element: " + name.value,
                scanner.getLineNumber(), scanner.getColumn());
        }

        dtd.items.addElement(element);
        parseContentSpec(scanner, element);

        expect(Scanner.GT);
    }

    protected void parseContentSpec(Scanner scanner, DTDElement element)
        throws IOException {
        Token token = scanner.get();

        if (token.type == Scanner.IDENTIFIER) {
            if (token.value.equals("EMPTY")) {
                element.content = new DTDEmpty();
            } else if (token.value.equals("ANY")) {
                element.content = new DTDAny();
            } else {
                throw new DTDParseException(scanner.getUriId(),
                    "Invalid token in entity content spec " +
                        token.value,
                    scanner.getLineNumber(), scanner.getColumn());
            }
        } else if (token.type == Scanner.LPAREN) {
            token = scanner.peek();

            if (token.type == Scanner.IDENTIFIER) {
                if (token.value.equals("#PCDATA")) {
                    parseMixed(element);
                } else {
                    parseChildren(element);
                }
            } else if (token.type == Scanner.LPAREN) {
                parseChildren(element);
            }
        }
    }

    protected void parseMixed(DTDElement element)
        throws IOException {
        // MAW Version 1.19
        // Keep track of whether the mixed is #PCDATA only
        // Don't allow * after (#PCDATA), but allow after
        // (#PCDATA|foo|bar|baz)*
        boolean isPcdataOnly = true;

        DTDMixed mixed = new DTDMixed();

        mixed.add(new DTDPCData());

        scanner.get();

        element.content = mixed;

        for (; ; ) {
            Token token = scanner.get();

            if (token.type == Scanner.RPAREN) {
                token = scanner.peek();

                if (token.type == Scanner.ASTERISK) {
                    scanner.get();
                    mixed.cardinal = DTDCardinal.ZEROMANY;
                } else {
                    if (!isPcdataOnly) {
                        throw new DTDParseException(scanner.getUriId(),
                            "Invalid token in Mixed content type, '*' required after (#PCDATA|xx ...): " +
                                token.type.name, scanner.getLineNumber(), scanner.getColumn());
                    }

                    mixed.cardinal = DTDCardinal.NONE;
                }

                return;
            } else if (token.type == Scanner.PIPE) {
                token = scanner.get();

                mixed.add(new DTDName(token.value));

                // MAW Ver. 1.19
                isPcdataOnly = false;
            } else {
                throw new DTDParseException(scanner.getUriId(),
                    "Invalid token in Mixed content type: " +
                        token.type.name, scanner.getLineNumber(), scanner.getColumn());
            }
        }
    }

    protected void parseChildren(DTDElement element)
        throws IOException {
        DTDContainer choiceSeq = parseChoiceSequence();

        Token token = scanner.peek();

        choiceSeq.cardinal = parseCardinality();

        if (token.type == Scanner.QUES) {
            choiceSeq.cardinal = DTDCardinal.OPTIONAL;
        } else if (token.type == Scanner.ASTERISK) {
            choiceSeq.cardinal = DTDCardinal.ZEROMANY;
        } else if (token.type == Scanner.PLUS) {
            choiceSeq.cardinal = DTDCardinal.ONEMANY;
        } else {
            choiceSeq.cardinal = DTDCardinal.NONE;
        }

        element.content = choiceSeq;
    }

    protected DTDContainer parseChoiceSequence()
        throws IOException {
        TokenType separator = null;

        DTDContainer cs = null;

        for (; ; ) {
            DTDItem item = parseCP();

            Token token = scanner.get();

            if ((token.type == Scanner.PIPE) ||
                (token.type == Scanner.COMMA)) {
                if ((separator != null) && (separator != token.type)) {
                    throw new DTDParseException(scanner.getUriId(),
                        "Can't mix separators in a choice/sequence",
                        scanner.getLineNumber(), scanner.getColumn());
                }
                separator = token.type;

                if (cs == null) {
                    if (token.type == Scanner.PIPE) {
                        cs = new DTDChoice();
                    } else {
                        cs = new DTDSequence();
                    }
                }
                cs.add(item);
            } else if (token.type == Scanner.RPAREN) {
                if (cs == null) {
                    cs = new DTDSequence();
                }
                cs.add(item);
                return cs;
            } else {
                throw new DTDParseException(scanner.getUriId(),
                    "Found invalid token in sequence: " +
                        token.type.name, scanner.getLineNumber(), scanner.getColumn());
            }
        }
    }

    protected DTDItem parseCP()
        throws IOException {
        Token token = scanner.get();

        DTDItem item = null;

        if (token.type == Scanner.IDENTIFIER) {
            item = new DTDName(token.value);
        } else if (token.type == Scanner.LPAREN) {
            item = parseChoiceSequence();
        } else {
            throw new DTDParseException(scanner.getUriId(),
                "Found invalid token in sequence: " +
                    token.type.name, scanner.getLineNumber(),
                scanner.getColumn());
        }

        item.cardinal = parseCardinality();

        return item;
    }

    protected DTDCardinal parseCardinality()
        throws IOException {
        Token token = scanner.peek();

        if (token.type == Scanner.QUES) {
            scanner.get();
            return DTDCardinal.OPTIONAL;
        } else if (token.type == Scanner.ASTERISK) {
            scanner.get();
            return DTDCardinal.ZEROMANY;
        } else if (token.type == Scanner.PLUS) {
            scanner.get();
            return DTDCardinal.ONEMANY;
        } else {
            return DTDCardinal.NONE;
        }
    }

    protected void parseAttlist()
        throws IOException {
        Token token = expect(Scanner.IDENTIFIER);

        DTDElement element = (DTDElement) dtd.elements.get(token.value);

        DTDAttlist attlist = new DTDAttlist(token.value);

        dtd.items.addElement(attlist);

        if (element == null) {
            element = new DTDElement(token.value);
            dtd.elements.put(token.value, element);
        }

        token = scanner.peek();

        while (token.type != Scanner.GT) {
            parseAttdef(scanner, element, attlist);
            token = scanner.peek();
        }
// MAW Version 1.17
// Prior to this version, the parser didn't actually consume the > at the
// end of the ATTLIST definition. Because the parser ignored unexpected tokens
// at the top level, it was ignoring the >. In parsing DOCBOOK, however, there
// were two unexpected tokens, bringing this error to light.
        expect(Scanner.GT);
    }

    protected void parseAttdef(Scanner scanner, DTDElement element,
                               DTDAttlist attlist)
        throws IOException {
        Token token = expect(Scanner.IDENTIFIER);

        DTDAttribute attr = new DTDAttribute(token.value);

        attlist.attributes.addElement(attr);

        element.attributes.put(token.value, attr);

        token = scanner.get();

        if (token.type == Scanner.IDENTIFIER) {
            if (token.value.equals("NOTATION")) {
                attr.type = parseNotationList();
            } else {
                attr.type = token.value;
            }
        } else if (token.type == Scanner.LPAREN) {
            attr.type = parseEnumeration();
        }

        token = scanner.peek();

        if (token.type == Scanner.IDENTIFIER) {
            scanner.get();
            if (token.value.equals("#FIXED")) {
                attr.decl = DTDDecl.FIXED;

                token = scanner.get();
                attr.defaultValue = token.value;
            } else if (token.value.equals("#REQUIRED")) {
                attr.decl = DTDDecl.REQUIRED;
            } else if (token.value.equals("#IMPLIED")) {
                attr.decl = DTDDecl.IMPLIED;
            } else {
                throw new DTDParseException(scanner.getUriId(),
                    "Invalid token in attribute declaration: " +
                        token.value, scanner.getLineNumber(), scanner.getColumn());
            }
        } else if (token.type == Scanner.STRING) {
            scanner.get();
            attr.decl = DTDDecl.VALUE;
            attr.defaultValue = token.value;
        }
    }

    protected DTDNotationList parseNotationList()
        throws IOException {
        DTDNotationList notation = new DTDNotationList();

        Token token = scanner.get();
        if (token.type != Scanner.LPAREN) {
            throw new DTDParseException(scanner.getUriId(),
                "Invalid token in notation: " +
                    token.type.name, scanner.getLineNumber(),
                scanner.getColumn());
        }

        for (; ; ) {
            token = scanner.get();

            if (token.type != Scanner.IDENTIFIER) {
                throw new DTDParseException(scanner.getUriId(),
                    "Invalid token in notation: " +
                        token.type.name, scanner.getLineNumber(),
                    scanner.getColumn());
            }

            notation.add(token.value);

            token = scanner.peek();

            if (token.type == Scanner.RPAREN) {
                scanner.get();
                return notation;
            } else if (token.type != Scanner.PIPE) {
                throw new DTDParseException(scanner.getUriId(),
                    "Invalid token in notation: " +
                        token.type.name, scanner.getLineNumber(),
                    scanner.getColumn());
            }
            scanner.get(); // eat the pipe
        }
    }

    protected DTDEnumeration parseEnumeration()
        throws IOException {
        DTDEnumeration enumeration = new DTDEnumeration();

        for (; ; ) {
            Token token = scanner.get();

            if ((token.type != Scanner.IDENTIFIER) &&
                (token.type != Scanner.NMTOKEN)) {
                throw new DTDParseException(scanner.getUriId(),
                    "Invalid token in enumeration: " +
                        token.type.name, scanner.getLineNumber(),
                    scanner.getColumn());
            }

            enumeration.add(token.value);

            token = scanner.peek();

            if (token.type == Scanner.RPAREN) {
                scanner.get();
                return enumeration;
            } else if (token.type != Scanner.PIPE) {
                throw new DTDParseException(scanner.getUriId(),
                    "Invalid token in enumeration: " +
                        token.type.name, scanner.getLineNumber(),
                    scanner.getColumn());
            }
            scanner.get(); // eat the pipe
        }
    }

    protected void parseEntity()
        throws IOException {
        boolean isParsed = false;

        Token name = scanner.get();

        if (name.type == Scanner.PERCENT) {
            isParsed = true;
            name = expect(Scanner.IDENTIFIER);
        } else if (name.type != Scanner.IDENTIFIER) {
            throw new DTDParseException(scanner.getUriId(),
                "Invalid entity declaration",
                scanner.getLineNumber(), scanner.getColumn());
        }

        DTDEntity entity = (DTDEntity) dtd.entities.get(name.value);

        boolean skip = false;

        if (entity == null) {
            entity = new DTDEntity(name.value, defaultLocation);
            dtd.entities.put(entity.name, entity);
        } else {
// 070501 MAW: If the entity already exists, create a dummy entity - this way
// you keep the original definition.  Thanks to Jags Krishnamurthy of Object
// Edge for pointing out this problem and for pointing out the solution
            entity = new DTDEntity(name.value, defaultLocation);
            skip = true;
        }

        dtd.items.addElement(entity);

        entity.isParsed = isParsed;

        parseEntityDef(entity);

        if (entity.isParsed && (entity.value != null) && !skip) {
            scanner.addEntity(entity.name, entity.value);
        }
    }

    protected void parseEntityDef(DTDEntity entity)
        throws IOException {
        Token token = scanner.get();

        if (token.type == Scanner.STRING) {
            // Only set the entity value if it hasn't been set yet
            // XML 1.0 spec says that you use the first value of
            // an entity, not the most recent.
            if (entity.value == null) {
                entity.value = token.value;
            }
        } else if (token.type == Scanner.IDENTIFIER) {
            if (token.value.equals("SYSTEM")) {
                DTDSystem sys = new DTDSystem();
                token = expect(Scanner.STRING);

                sys.system = token.value;
                entity.externalID = sys;
            } else if (token.value.equals("PUBLIC")) {
                DTDPublic pub = new DTDPublic();

                token = expect(Scanner.STRING);
                pub.pub = token.value;
                token = expect(Scanner.STRING);
                pub.system = token.value;
                entity.externalID = pub;
            } else {
                throw new DTDParseException(scanner.getUriId(),
                    "Invalid External ID specification",
                    scanner.getLineNumber(), scanner.getColumn());
            }


            // ISSUE: isParsed is set to TRUE if this is a Parameter Entity
            //     Reference (assuming this is because Parameter Entity
            //     external references are parsed, whereas General Entity
            //     external references are irrelevant for this product).
            //     However, NDATA is only valid if this is
            //     a General Entity Reference. So, "if" conditional should
            //     be (!entity.isParsed) rather than (entity.isParsed).
            //
            //Entity Declaration
            // [70] EntityDecl ::= GEDecl | PEDecl
            // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
            // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
            // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
            // [74] PEDef ::= EntityValue | ExternalID
            //External Entity Declaration
            // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
            //          | 'PUBLIC' S PubidLiteral S SystemLiteral
            // [76] NDataDecl ::= S 'NDATA' S Name [ VC: Notation Declared ]

            if (!entity.isParsed) // CHANGE 1
            {
                token = scanner.peek();
                if (token.type == Scanner.IDENTIFIER) {
                    if (!token.value.equals("NDATA")) {
                        throw new DTDParseException(scanner.getUriId(),
                            "Invalid NData declaration",
                            scanner.getLineNumber(), scanner.getColumn());
                    }
                    // CHANGE 2: Add call to scanner.get.
                    //      This gets "NDATA" IDENTIFIER.
                    token = scanner.get();
                    // Get the NDATA "Name" IDENTIFIER.
                    token = expect(Scanner.IDENTIFIER);
                    // Save the ndata value
                    entity.ndata = token.value;
                }
            }
        } else {
            throw new DTDParseException(scanner.getUriId(),
                "Invalid entity definition",
                scanner.getLineNumber(), scanner.getColumn());
        }

        expect(Scanner.GT);
    }

    protected void parseNotation()
        throws java.io.IOException {
        DTDNotation notation = new DTDNotation();

        Token token = expect(Scanner.IDENTIFIER);

        notation.name = token.value;

        dtd.notations.put(notation.name, notation);
        dtd.items.addElement(notation);

        token = expect(Scanner.IDENTIFIER);

        if (token.value.equals("SYSTEM")) {
            DTDSystem sys = new DTDSystem();
            token = expect(Scanner.STRING);

            sys.system = token.value;
            notation.externalID = sys;
        } else if (token.value.equals("PUBLIC")) {
            DTDPublic pub = new DTDPublic();
            token = expect(Scanner.STRING);

            pub.pub = token.value;
            pub.system = null;

// For <!NOTATION>, you can have PUBLIC PubidLiteral without
// a SystemLiteral
            token = scanner.peek();
            if (token.type == Scanner.STRING) {
                token = scanner.get();
                pub.system = token.value;
            }

            notation.externalID = pub;
        }
        expect(Scanner.GT);
    }

    public DTDEntity expandEntity(String name) {
        return (DTDEntity) dtd.entities.get(name);
    }
}
